import chardet
import re

class RefSlicer(object):
    def __init__(self, start_expr, end_expr, line_head_expr, line_tail_expr):
        self._start_expr = start_expr
        self._end_expr = end_expr
        self._line_head_expr = line_head_expr
        self._line_tail_expr = line_tail_expr
        self._ref_list = []
        self._text = ""

    def open(self, filepath: str):
        with open(filepath, 'rb') as f:
            r = f.read()
            char_info = chardet.detect(r)
            if char_info['encoding'] == "GB2312":
                char_info['encoding'] = "GB18030"

            self._text =  r.decode(char_info['encoding'])

    def reset(self):
        self._text = ""
        self._ref_list = []

    def get_ref_count(self):
        return len(self._ref_list)

    def get_ref_list(self):
        return self._ref_list

    def find_all_refs(self):
        text = self._text
        reference = ""
        start_expr_re = re.compile(self._start_expr)
        end_expr_re = re.compile(self._end_expr)

        start_re_result = start_expr_re.search(text)
        while  start_re_result != None:
            end_re_result = end_expr_re.search(text)
            if end_re_result == None:
                reference = text[start_re_result.span()[1]:]
                text = ""
            else:
                reference = text[start_re_result.span()[1]: end_re_result.span()[0]]
                text = text[end_re_result.span()[1]:]

            self._slice_refs(reference)
            start_re_result = start_expr_re.search(text)

    def _slice_refs(self, ref):
        if ref == "":
            return 

        lines = ref.splitlines()
        line_head_expr_re = None
        line_tail_expr_re = None

        if self._line_head_expr != "":
            line_head_expr_re = re.compile(self._line_head_expr)
        
        if self._line_tail_expr != "":
            line_tail_expr_re = re.compile(self._line_tail_expr)

        if line_head_expr_re == None and line_tail_expr_re == None:
            self._ref_list.extend(lines)
            return 

        for l in lines:
            position_head = 0
            position_tail = len(l) - 1

            if line_head_expr_re != None:
                line_head_result = line_head_expr_re.search(l)
                if line_head_result != None:
                    position_head = line_head_result.span()[1]
                else:
                    continue

            if line_tail_expr_re != None:
                line_tail_result = line_tail_expr_re.search(l)
                if line_tail_result != None:
                    position_tail = line_tail_result.span()[0]

            self._ref_list.append(l[position_head: position_tail])